In [2]:
from datetime import datetime
start = datetime.now()
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from itertools import combinations
from collections import defaultdict, Counter
from sklearn.feature_extraction import DictVectorizer

Global Variables


In [4]:
BRANDS_LIST = list()
N_BRANDS = int()
N_FOLLOWERS = int()
DV = DictVectorizer(dtype=int)

Loading Data to Memory


In [5]:
def load_data(in_file, brands_to_load = None, max_limit = 1404, verbose = False):
    """Loads data from the given data file.
    If the brands to be loaded is less than the maximum rows in the datafile, randomize the loading.
    Args:
      brands_to_load .... Number of brands to be loaded.
      in_file       .... A string representing the location and datafile name to be loaded.
    Returns:
      None. The data is loaded into the global variables."""

    if brands_to_load is None:
        brands_to_load = max_limit 
    
    #Randomize Loading
    if brands_to_load != max_limit:
        choices = sorted(random.sample(range(max_limit), brands_to_load))
        max_row_id = max(choices)
    else:
        choices = range(max_limit)
        max_row_id = max_limit-1
        
    brand_no = 0
    cur_idx = 0
    brand_followers = list()
    
    with open(in_file) as data_file:
        for row in data_file:
            if brand_no > max_row_id:
                break
            if brand_no != choices[cur_idx]:
                brand_no += 1
                continue
            else:
                followers = row.split()
                brand = followers.pop(0)
                followers_count = len(followers)
            
                #Ignoring brands with less than 1000 followers
                if followers_count > 1000:  
                    
                    followers_dict = Counter(followers)
                    brand_followers.append(followers_dict)
                    BRANDS_LIST.append(brand)
                    
                    if verbose:
                        print('Loaded',brand,'-',followers_count,'followers.')
                else:
                    if verbose:
                        print('Skipped',brand,'-',followers_count,'followers.')
            
                brand_no += 1
                cur_idx += 1
    
    data = DV.fit_transform(brand_followers)
    
    return data

In [6]:
brands_to_load = 100
max_rows = 1404
verbose = True

BF_DATA = load_data('data/brand_followers_final.tsv', brands_to_load, max_rows, verbose)
N_BRANDS,N_FOLLOWERS = BF_DATA.get_shape()
print('\nNo. of Brands', N_BRANDS)
print('No. of Unique Followers', N_FOLLOWERS)


Loaded 20thcenturyfox - 500001 followers.
Loaded acer - 12345 followers.
Loaded activia - 1970 followers.
Loaded aquaphorus - 17640 followers.
Loaded arcona_la - 1282 followers.
Loaded athenos - 8253 followers.
Loaded bayescleaners - 8585 followers.
Loaded bhcosmetics - 50963 followers.
Loaded birdseye - 12701 followers.
Loaded blue_bunny - 31086 followers.
Loaded bobatl - 500001 followers.
Loaded bodyimage - 3262 followers.
Loaded buitoniusa - 7430 followers.
Loaded bummis - 9345 followers.
Loaded butterfinger - 30784 followers.
Loaded bwaynails - 30929 followers.
Loaded caneandaustin - 1121 followers.
Loaded chefmartinyan - 1856 followers.
Loaded cosmermaid - 6270 followers.
Loaded cuddleclones - 1534 followers.
Loaded davinesofficial - 3997 followers.
Loaded dial - 7460 followers.
Loaded dreamdrystyle - 4115 followers.
Loaded drysolecuador - 1638 followers.
Loaded eckounltd - 18914 followers.
Loaded eco_me - 1609 followers.
Loaded embryolisseusa - 3304 followers.
Loaded essentiaphwater - 15306 followers.
Loaded evelom - 5038 followers.
Loaded feelgoodfoods - 2614 followers.
Loaded fflpartnership - 4560 followers.
Loaded footsmart - 1535 followers.
Loaded freschetta - 11151 followers.
Loaded gonecrackers - 6762 followers.
Loaded guayaki - 7100 followers.
Loaded highoctaneinfo - 6112 followers.
Loaded honesttea - 26700 followers.
Loaded horizon_organic - 6351 followers.
Loaded hypehair - 5799 followers.
Loaded iams - 14421 followers.
Loaded ito_en - 4136 followers.
Loaded jonel_aleccia - 2104 followers.
Loaded jrwnaturals - 4874 followers.
Loaded kaplanmd - 2627 followers.
Loaded kimkardashian - 500001 followers.
Loaded kmscaliforniauk - 1852 followers.
Loaded labseries - 2112 followers.
Loaded lunabar - 35374 followers.
Loaded macadamiahair - 4150 followers.
Loaded melloyello - 6444 followers.
Loaded mieleusa - 2975 followers.
Loaded missionstfood - 20869 followers.
Loaded mommadefoods - 11342 followers.
Loaded momsbestcereal - 3041 followers.
Loaded muirglenorganic - 1431 followers.
Loaded mybasis - 8307 followers.
Loaded naturespath - 30140 followers.
Loaded naturesvariety - 3331 followers.
Loaded nestlelalechera - 3697 followers.
Loaded neweracap - 130824 followers.
Loaded nuancehealth - 4975 followers.
Loaded nutrasumma - 1178 followers.
Loaded nutrilitehealth - 45481 followers.
Loaded nyxcosmetics - 215971 followers.
Loaded oikos - 17457 followers.
Loaded olayphilippines - 6499 followers.
Loaded oldelpasouk - 4803 followers.
Loaded onetouch - 8480 followers.
Loaded organicfoodbar - 12779 followers.
Loaded pacificfoods - 14007 followers.
Loaded plantronics - 22109 followers.
Loaded prescriptives - 2862 followers.
Loaded quiltednorthern - 6485 followers.
Loaded real_babylux - 500001 followers.
Loaded renefurtererus - 1079 followers.
Loaded rightguardus - 1282 followers.
Loaded ripitupnz - 7134 followers.
Loaded rococochocs - 7064 followers.
Loaded sibu_beauty - 6222 followers.
Loaded skinauthority - 2691 followers.
Loaded skittles - 170036 followers.
Loaded southbeachdiet - 12861 followers.
Loaded sprite - 125301 followers.
Loaded starburst - 41988 followers.
Loaded suavebeauty - 18208 followers.
Loaded success_rice - 3173 followers.
Loaded swakdesigns - 5896 followers.
Loaded thebalmid - 1629 followers.
Loaded thefrownies - 11960 followers.
Loaded themarklong - 65512 followers.
Loaded thesimplegreen - 3599 followers.
Loaded tmobile - 345526 followers.
Loaded tofuttibrand - 4563 followers.
Loaded tresemme - 44890 followers.
Loaded tullys_shops - 12061 followers.
Loaded tulsashock - 13000 followers.
Loaded turkeyhillmm - 1117 followers.
Loaded udderlysmooth - 13003 followers.
Loaded ulta_beauty - 217454 followers.

No. of Brands 99
No. of Unique Followers 3662987

No. of Brands Followed Vs No. of Followers


In [16]:
hist, bins = np.histogram(sum(BF_DATA).toarray(),bins=50)    
width = 0.9 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.yscale("log")
plt.bar(center, hist, align='center', width=width)
plt.title('Brands Followed Vs Followers Count')


Out[16]:
<matplotlib.text.Text at 0x1c8cf66dd8>

In [6]:
x = sum(BF_DATA).toarray()
followers = np.where(x >= 5)[1]
print('Unique Followers Pool',len(followers)) 
print('\nSample Values:')
print('\tFollower Idx:',followers[0], 'Follower ID:', DV.feature_names_[followers[0]])
print('\tBrands Followed:', len(BF_DATA.getcol(followers[0]).nonzero()[0]))


Unique Followers Pool 147273

Sample Values:
	Follower Idx: 8 Follower ID: 1000002090
	Brands Followed: 7

In [7]:
pct = 0.005
follower_ind = random.sample(list(followers), int(len(followers) * pct))
print('Sampled',len(follower_ind),'followers')
print('Indices:',follower_ind)


Sampled 736 followers
Indices: [122135, 7541638, 4162971, 4455724, 4037900, 2619512, 5575530, 2412347, 3823513, 2991079, 7505946, 3456850, 4863084, 945506, 4589273, 608781, 1617904, 3963708, 7438623, 7391759, 6423030, 1167362, 326457, 4154920, 505041, 3697819, 3510814, 1870199, 1870646, 1623842, 6482962, 183730, 607345, 7576177, 1000359, 2759826, 6790695, 7148131, 2697733, 3595210, 625880, 1644062, 2123767, 4402982, 2773610, 1756431, 830359, 5069041, 427124, 4055957, 2540593, 1741430, 1789752, 2898680, 7133688, 2815593, 163331, 3622491, 1697681, 3698247, 3456610, 582260, 6818833, 6906597, 7062060, 4091701, 2531413, 1535829, 5851641, 1672627, 4155762, 3562149, 1563437, 4678491, 2761654, 133229, 6162087, 2599251, 3203566, 5104059, 3683507, 7179027, 3091782, 1394201, 3770578, 5984212, 1043369, 616705, 4930991, 6885797, 1078348, 4778289, 2395435, 3924562, 2738323, 3447414, 5904057, 6989171, 5429648, 6668732, 2331113, 1591003, 6224831, 2492112, 3357288, 1042348, 3026294, 4345336, 326884, 4955528, 417101, 834372, 6758589, 5348296, 3804875, 7114551, 4209860, 3801864, 4612929, 1365865, 3171952, 100527, 5296430, 2643537, 1755327, 3256351, 2724019, 5848717, 4521330, 2729270, 114093, 1606946, 1455167, 3051078, 142873, 5157047, 924231, 2448399, 4994073, 7231844, 5325252, 6900704, 3773237, 2688481, 137112, 4043277, 2824153, 7397365, 64754, 2379101, 2038173, 5005835, 1518546, 209951, 1299422, 5068376, 6949500, 369004, 1070810, 7051668, 7282805, 1899037, 1537247, 2058819, 3677270, 5067416, 6064199, 5183834, 3187150, 3765815, 4148508, 4435995, 2916433, 1554729, 5355186, 4572389, 4870972, 6632689, 1539575, 2522159, 6599186, 4890613, 4278225, 1396257, 5691867, 91087, 2509823, 6790197, 4224639, 7366135, 4041160, 6161160, 2972708, 1347635, 6255153, 2456173, 4369919, 2035064, 7192381, 917033, 65027, 7299697, 4302738, 6038286, 1393705, 5420219, 6859925, 2534836, 7198750, 1025580, 6940985, 7514609, 1119678, 7286466, 5018133, 5527429, 4414994, 4672356, 7605833, 4200290, 3404243, 2143113, 3370069, 1199563, 3734270, 2094975, 2758277, 2487443, 1733278, 7134990, 3855050, 1724549, 1551312, 6795949, 2140950, 2829855, 6360162, 6079510, 2001945, 3182492, 595081, 5140066, 7291724, 5566597, 5988512, 4561803, 7217886, 7494189, 6438351, 2192694, 6167664, 2700795, 3260214, 4220269, 2705860, 3249268, 6381742, 4350345, 4831481, 6591483, 6115442, 3780238, 3237353, 6783312, 1158170, 1552693, 870240, 1716636, 2724613, 4238903, 4150251, 5253345, 197272, 6374769, 1579245, 7227202, 1157797, 5628607, 6541068, 6450679, 6096837, 3869300, 6005449, 4351467, 3338305, 7010027, 3895988, 3840699, 6617284, 3352756, 5835229, 1682087, 4773553, 1693333, 6203626, 2158015, 6521169, 234383, 2879550, 2327691, 2721585, 157204, 6002068, 3564216, 6079155, 5831093, 3884660, 2797587, 1429189, 2912114, 7441648, 268229, 942493, 4574788, 207076, 4709414, 2339785, 968464, 3013628, 3038420, 354856, 3785500, 7422433, 6653880, 653454, 2506561, 467499, 64745, 6122649, 2621265, 518470, 2754057, 1946959, 2577295, 2287031, 7375625, 1285433, 779850, 1521222, 853313, 124932, 2810674, 4687563, 6836818, 4378349, 6823806, 990682, 2705068, 4442803, 3785516, 3825826, 301536, 5775678, 35234, 4304271, 6018824, 7467953, 1959616, 3577009, 3339257, 672834, 1637695, 3401958, 2444105, 1551303, 1774213, 3192926, 1508641, 5633767, 5458057, 3785671, 1936906, 945733, 4697985, 6918843, 3779858, 5955177, 4001976, 1525157, 6718015, 864389, 2939859, 3363289, 7525918, 3981830, 3183291, 2897797, 72682, 2506361, 951760, 2932874, 7382704, 2867474, 5145490, 3243936, 6485564, 2767408, 5620838, 7644942, 1739155, 7087335, 2491912, 3352389, 2462460, 4761158, 389445, 957639, 4054696, 388597, 2238804, 1935888, 3633069, 3035948, 3750493, 7531522, 5600014, 3968378, 4914551, 1413080, 1124983, 768027, 6730623, 4447465, 4096016, 5119588, 5246686, 289570, 2643739, 7025143, 1678439, 5800427, 499727, 7360388, 6458260, 3480957, 521549, 3386953, 5120874, 6745796, 1726332, 6136599, 7557543, 3231512, 2782213, 4225276, 5186399, 5800932, 5289434, 2905158, 1809066, 3151832, 2087876, 4791510, 653883, 7430229, 449500, 2046001, 6955829, 4041311, 4101539, 2945617, 4748829, 3747008, 6296733, 3014513, 2986493, 5189767, 3867014, 5354077, 4101258, 2046181, 3162897, 6716713, 4972937, 2391578, 6538891, 593421, 2458258, 3385622, 6118746, 5970902, 3604045, 6719998, 3801755, 5468081, 910662, 2967391, 4103691, 1051436, 4202885, 3125589, 189188, 1150794, 3964063, 3226093, 5107986, 4839387, 1038799, 773037, 2618492, 3142485, 6447319, 891814, 7254529, 7515054, 1031449, 2460463, 4346705, 3281856, 929808, 3491434, 5835227, 2865808, 3002840, 6367271, 875711, 5380568, 3674501, 3260210, 17401, 4236012, 1319140, 1407447, 7417387, 5995926, 250386, 391256, 2817471, 5879813, 3642955, 2324573, 1129867, 4326511, 4234856, 3827371, 7588966, 5786502, 5393169, 6701077, 4013819, 2343341, 3748552, 1487120, 1216299, 1184273, 1347953, 3022710, 5366202, 389048, 5739676, 6841797, 5080308, 2130513, 6193070, 168611, 6906116, 2717448, 2643020, 3369556, 2143106, 296906, 1732743, 3090398, 5661486, 7070759, 2995920, 4241001, 1265017, 4647929, 6907433, 5429589, 5075423, 1353745, 7213140, 6081291, 3252889, 3358961, 2856152, 3031899, 3290347, 4490734, 3965469, 1206771, 1391796, 6498004, 6024459, 1285629, 395691, 6033176, 326962, 3221947, 4956086, 3067451, 5841773, 2117314, 3461185, 6033925, 1295028, 5344772, 278952, 2006294, 3386248, 7089933, 2776915, 1283781, 5433324, 5600192, 6968635, 2491367, 3536650, 7260665, 2264117, 7148738, 2406474, 3843358, 3743229, 4469146, 3140641, 103953, 1910911, 2791244, 6796172, 5151417, 5712176, 1332157, 6423342, 360111, 1006818, 6028093, 4710712, 2737735, 6354149, 1672572, 3970809, 7208701, 3864753, 3823313, 6016124, 4974914, 3126911, 468302, 6952673, 5938584, 3125681, 3463433, 6597184, 419382, 1789128, 486217, 2404281, 3423725, 5771710, 7407313, 6455466, 1402532, 478281, 1310912, 5552382, 6214997, 462499, 1565957, 6912891, 3648828, 7317461, 4880541, 6611716, 1794840, 3985228, 2817903, 2118648, 2474213, 3509611, 7583313, 1362311, 3007324, 1942784, 6181604, 6866066, 2608699, 4133854, 1639560, 2607285, 6818863, 6753148, 7292150, 5770669, 3022355, 2963543, 5492285, 5756353, 2091102, 3801012, 1862364, 386973, 5681904, 2676907, 529413, 5908678, 556311, 2561013, 1847566, 1345205, 520311, 2871853, 3598917, 4004539, 4277456, 4900069, 487235, 3011546, 4963306, 3538296, 3948490, 3460419, 6654202, 2425079, 1737602, 998330, 7407904, 2542677, 5168385, 1576549, 393549, 4259831, 3691635, 6387776, 5801850, 7128793, 6285106, 4085893, 195461, 4066938, 3782824, 6792598, 3670890, 6119426]

In [8]:
brands_to_remove = {}
for follower_idx in follower_ind:
    brand_idx = np.random.choice(BF_DATA.getcol(follower_idx).nonzero()[0])
    brands_to_remove[follower_idx] = brand_idx
    BF_DATA[brand_idx, follower_idx] = 0
    print('Removed edge', (brand_idx,follower_idx))


Removed edge (48, 122135)
Removed edge (98, 7541638)
Removed edge (147, 4162971)
Removed edge (68, 4455724)
Removed edge (161, 4037900)
Removed edge (107, 2619512)
Removed edge (93, 5575530)
Removed edge (147, 2412347)
Removed edge (48, 3823513)
Removed edge (53, 2991079)
Removed edge (53, 7505946)
Removed edge (182, 3456850)
Removed edge (125, 4863084)
Removed edge (32, 945506)
Removed edge (147, 4589273)
Removed edge (147, 608781)
Removed edge (170, 1617904)
Removed edge (184, 3963708)
Removed edge (56, 7438623)
Removed edge (186, 7391759)
Removed edge (67, 6423030)
Removed edge (21, 1167362)
Removed edge (49, 326457)
Removed edge (82, 4154920)
Removed edge (53, 505041)
Removed edge (182, 3697819)
Removed edge (98, 3510814)
Removed edge (32, 1870199)
Removed edge (48, 1870646)
Removed edge (38, 1623842)
Removed edge (83, 6482962)
Removed edge (147, 183730)
Removed edge (52, 607345)
Removed edge (35, 7576177)
Removed edge (52, 1000359)
Removed edge (49, 2759826)
Removed edge (37, 6790695)
Removed edge (101, 7148131)
Removed edge (52, 2697733)
Removed edge (157, 3595210)
Removed edge (35, 625880)
Removed edge (107, 1644062)
Removed edge (101, 2123767)
Removed edge (147, 4402982)
Removed edge (48, 2773610)
Removed edge (107, 1756431)
Removed edge (184, 830359)
Removed edge (101, 5069041)
Removed edge (83, 427124)
Removed edge (184, 4055957)
Removed edge (41, 2540593)
Removed edge (98, 1741430)
Removed edge (24, 1789752)
Removed edge (107, 2898680)
Removed edge (42, 7133688)
Removed edge (147, 2815593)
Removed edge (184, 163331)
Removed edge (86, 3622491)
Removed edge (147, 1697681)
Removed edge (67, 3698247)
Removed edge (184, 3456610)
Removed edge (53, 582260)
Removed edge (98, 6818833)
Removed edge (182, 6906597)
Removed edge (98, 7062060)
Removed edge (157, 4091701)
Removed edge (147, 2531413)
Removed edge (182, 1535829)
Removed edge (87, 5851641)
Removed edge (67, 1672627)
Removed edge (32, 4155762)
Removed edge (60, 3562149)
Removed edge (170, 1563437)
Removed edge (52, 4678491)
Removed edge (129, 2761654)
Removed edge (101, 133229)
Removed edge (76, 6162087)
Removed edge (49, 2599251)
Removed edge (68, 3203566)
Removed edge (184, 5104059)
Removed edge (49, 3683507)
Removed edge (21, 7179027)
Removed edge (37, 3091782)
Removed edge (10, 1394201)
Removed edge (78, 3770578)
Removed edge (0, 5984212)
Removed edge (52, 1043369)
Removed edge (68, 616705)
Removed edge (68, 4930991)
Removed edge (35, 6885797)
Removed edge (176, 1078348)
Removed edge (107, 4778289)
Removed edge (52, 2395435)
Removed edge (98, 3924562)
Removed edge (49, 2738323)
Removed edge (176, 3447414)
Removed edge (170, 5904057)
Removed edge (144, 6989171)
Removed edge (52, 5429648)
Removed edge (184, 6668732)
Removed edge (76, 2331113)
Removed edge (107, 1591003)
Removed edge (80, 6224831)
Removed edge (147, 2492112)
Removed edge (147, 3357288)
Removed edge (125, 1042348)
Removed edge (138, 3026294)
Removed edge (75, 4345336)
Removed edge (48, 326884)
Removed edge (53, 4955528)
Removed edge (52, 417101)
Removed edge (49, 834372)
Removed edge (37, 6758589)
Removed edge (101, 5348296)
Removed edge (53, 3804875)
Removed edge (48, 7114551)
Removed edge (38, 4209860)
Removed edge (101, 3801864)
Removed edge (184, 4612929)
Removed edge (49, 1365865)
Removed edge (176, 3171952)
Removed edge (184, 100527)
Removed edge (38, 5296430)
Removed edge (124, 2643537)
Removed edge (68, 1755327)
Removed edge (49, 3256351)
Removed edge (44, 2724019)
Removed edge (179, 5848717)
Removed edge (53, 4521330)
Removed edge (176, 2729270)
Removed edge (147, 114093)
Removed edge (98, 1606946)
Removed edge (157, 1455167)
Removed edge (49, 3051078)
Removed edge (68, 142873)
Removed edge (49, 5157047)
Removed edge (48, 924231)
Removed edge (157, 2448399)
Removed edge (167, 4994073)
Removed edge (167, 7231844)
Removed edge (167, 5325252)
Removed edge (46, 6900704)
Removed edge (157, 3773237)
Removed edge (184, 2688481)
Removed edge (184, 137112)
Removed edge (176, 4043277)
Removed edge (53, 2824153)
Removed edge (46, 7397365)
Removed edge (184, 64754)
Removed edge (35, 2379101)
Removed edge (24, 2038173)
Removed edge (67, 5005835)
Removed edge (67, 1518546)
Removed edge (83, 209951)
Removed edge (176, 1299422)
Removed edge (147, 5068376)
Removed edge (21, 6949500)
Removed edge (98, 369004)
Removed edge (184, 1070810)
Removed edge (184, 7051668)
Removed edge (96, 7282805)
Removed edge (147, 1899037)
Removed edge (98, 1537247)
Removed edge (157, 2058819)
Removed edge (147, 3677270)
Removed edge (170, 5067416)
Removed edge (48, 6064199)
Removed edge (22, 5183834)
Removed edge (68, 3187150)
Removed edge (48, 3765815)
Removed edge (40, 4148508)
Removed edge (80, 4435995)
Removed edge (49, 2916433)
Removed edge (147, 1554729)
Removed edge (176, 5355186)
Removed edge (185, 4572389)
Removed edge (68, 4870972)
Removed edge (147, 6632689)
Removed edge (48, 1539575)
Removed edge (176, 2522159)
Removed edge (147, 6599186)
Removed edge (4, 4890613)
Removed edge (184, 4278225)
Removed edge (189, 1396257)
Removed edge (37, 5691867)
Removed edge (80, 91087)
Removed edge (48, 2509823)
Removed edge (184, 6790197)
Removed edge (170, 4224639)
Removed edge (35, 7366135)
Removed edge (12, 4041160)
Removed edge (184, 6161160)
Removed edge (48, 2972708)
Removed edge (167, 1347635)
Removed edge (12, 6255153)
Removed edge (49, 2456173)
Removed edge (49, 4369919)
Removed edge (184, 2035064)
Removed edge (167, 7192381)
Removed edge (49, 917033)
Removed edge (7, 65027)
Removed edge (147, 7299697)
Removed edge (32, 4302738)
Removed edge (98, 6038286)
Removed edge (49, 1393705)
Removed edge (21, 5420219)
Removed edge (49, 6859925)
Removed edge (184, 2534836)
Removed edge (49, 7198750)
Removed edge (96, 1025580)
Removed edge (184, 6940985)
Removed edge (49, 7514609)
Removed edge (104, 1119678)
Removed edge (76, 7286466)
Removed edge (44, 5018133)
Removed edge (32, 5527429)
Removed edge (123, 4414994)
Removed edge (65, 4672356)
Removed edge (101, 7605833)
Removed edge (81, 4200290)
Removed edge (176, 3404243)
Removed edge (13, 2143113)
Removed edge (124, 3370069)
Removed edge (83, 1199563)
Removed edge (48, 3734270)
Removed edge (53, 2094975)
Removed edge (182, 2758277)
Removed edge (53, 2487443)
Removed edge (147, 1733278)
Removed edge (170, 7134990)
Removed edge (48, 3855050)
Removed edge (49, 1724549)
Removed edge (68, 1551312)
Removed edge (53, 6795949)
Removed edge (21, 2140950)
Removed edge (53, 2829855)
Removed edge (96, 6360162)
Removed edge (107, 6079510)
Removed edge (111, 2001945)
Removed edge (107, 3182492)
Removed edge (184, 595081)
Removed edge (147, 5140066)
Removed edge (83, 7291724)
Removed edge (147, 5566597)
Removed edge (82, 5988512)
Removed edge (129, 4561803)
Removed edge (57, 7217886)
Removed edge (147, 7494189)
Removed edge (184, 6438351)
Removed edge (87, 2192694)
Removed edge (49, 6167664)
Removed edge (21, 2700795)
Removed edge (176, 3260214)
Removed edge (184, 4220269)
Removed edge (98, 2705860)
Removed edge (184, 3249268)
Removed edge (167, 6381742)
Removed edge (53, 4350345)
Removed edge (20, 4831481)
Removed edge (21, 6591483)
Removed edge (87, 6115442)
Removed edge (87, 3780238)
Removed edge (32, 3237353)
Removed edge (53, 6783312)
Removed edge (147, 1158170)
Removed edge (170, 1552693)
Removed edge (167, 870240)
Removed edge (53, 1716636)
Removed edge (184, 2724613)
Removed edge (170, 4238903)
Removed edge (24, 4150251)
Removed edge (48, 5253345)
Removed edge (52, 197272)
Removed edge (179, 6374769)
Removed edge (194, 1579245)
Removed edge (32, 7227202)
Removed edge (49, 1157797)
Removed edge (147, 5628607)
Removed edge (107, 6541068)
Removed edge (92, 6450679)
Removed edge (157, 6096837)
Removed edge (52, 3869300)
Removed edge (194, 6005449)
Removed edge (170, 4351467)
Removed edge (53, 3338305)
Removed edge (98, 7010027)
Removed edge (83, 3895988)
Removed edge (71, 3840699)
Removed edge (98, 6617284)
Removed edge (53, 3352756)
Removed edge (194, 5835229)
Removed edge (53, 1682087)
Removed edge (170, 4773553)
Removed edge (48, 1693333)
Removed edge (49, 6203626)
Removed edge (35, 2158015)
Removed edge (186, 6521169)
Removed edge (67, 234383)
Removed edge (107, 2879550)
Removed edge (27, 2327691)
Removed edge (120, 2721585)
Removed edge (87, 157204)
Removed edge (147, 6002068)
Removed edge (184, 3564216)
Removed edge (53, 6079155)
Removed edge (21, 5831093)
Removed edge (184, 3884660)
Removed edge (176, 2797587)
Removed edge (182, 1429189)
Removed edge (52, 2912114)
Removed edge (68, 7441648)
Removed edge (147, 268229)
Removed edge (48, 942493)
Removed edge (53, 4574788)
Removed edge (101, 207076)
Removed edge (178, 4709414)
Removed edge (184, 2339785)
Removed edge (98, 968464)
Removed edge (147, 3013628)
Removed edge (53, 3038420)
Removed edge (176, 354856)
Removed edge (48, 3785500)
Removed edge (147, 7422433)
Removed edge (182, 6653880)
Removed edge (53, 653454)
Removed edge (167, 2506561)
Removed edge (48, 467499)
Removed edge (147, 64745)
Removed edge (53, 6122649)
Removed edge (116, 2621265)
Removed edge (101, 518470)
Removed edge (98, 2754057)
Removed edge (49, 1946959)
Removed edge (98, 2577295)
Removed edge (53, 2287031)
Removed edge (53, 7375625)
Removed edge (53, 1285433)
Removed edge (189, 779850)
Removed edge (92, 1521222)
Removed edge (53, 853313)
Removed edge (170, 124932)
Removed edge (56, 2810674)
Removed edge (98, 4687563)
Removed edge (52, 6836818)
Removed edge (91, 4378349)
Removed edge (147, 6823806)
Removed edge (83, 990682)
Removed edge (184, 2705068)
Removed edge (184, 4442803)
Removed edge (53, 3785516)
Removed edge (49, 3825826)
Removed edge (124, 301536)
Removed edge (147, 5775678)
Removed edge (184, 35234)
Removed edge (147, 4304271)
Removed edge (167, 6018824)
Removed edge (48, 7467953)
Removed edge (32, 1959616)
Removed edge (184, 3577009)
Removed edge (176, 3339257)
Removed edge (21, 672834)
Removed edge (49, 1637695)
Removed edge (147, 3401958)
Removed edge (38, 2444105)
Removed edge (53, 1551303)
Removed edge (53, 1774213)
Removed edge (184, 3192926)
Removed edge (98, 1508641)
Removed edge (32, 5633767)
Removed edge (35, 5458057)
Removed edge (167, 3785671)
Removed edge (38, 1936906)
Removed edge (194, 945733)
Removed edge (124, 4697985)
Removed edge (189, 6918843)
Removed edge (80, 3779858)
Removed edge (35, 5955177)
Removed edge (83, 4001976)
Removed edge (98, 1525157)
Removed edge (21, 6718015)
Removed edge (101, 864389)
Removed edge (68, 2939859)
Removed edge (35, 3363289)
Removed edge (189, 7525918)
Removed edge (184, 3981830)
Removed edge (48, 3183291)
Removed edge (49, 2897797)
Removed edge (187, 72682)
Removed edge (53, 2506361)
Removed edge (184, 951760)
Removed edge (151, 2932874)
Removed edge (24, 7382704)
Removed edge (12, 2867474)
Removed edge (166, 5145490)
Removed edge (184, 3243936)
Removed edge (101, 6485564)
Removed edge (53, 2767408)
Removed edge (128, 5620838)
Removed edge (53, 7644942)
Removed edge (179, 1739155)
Removed edge (176, 7087335)
Removed edge (49, 2491912)
Removed edge (49, 3352389)
Removed edge (49, 2462460)
Removed edge (98, 4761158)
Removed edge (38, 389445)
Removed edge (147, 957639)
Removed edge (7, 4054696)
Removed edge (48, 388597)
Removed edge (68, 2238804)
Removed edge (68, 1935888)
Removed edge (167, 3633069)
Removed edge (48, 3035948)
Removed edge (147, 3750493)
Removed edge (67, 7531522)
Removed edge (98, 5600014)
Removed edge (49, 3968378)
Removed edge (35, 4914551)
Removed edge (170, 1413080)
Removed edge (32, 1124983)
Removed edge (53, 768027)
Removed edge (83, 6730623)
Removed edge (46, 4447465)
Removed edge (98, 4096016)
Removed edge (26, 5119588)
Removed edge (48, 5246686)
Removed edge (49, 289570)
Removed edge (35, 2643739)
Removed edge (21, 7025143)
Removed edge (21, 1678439)
Removed edge (98, 5800427)
Removed edge (184, 499727)
Removed edge (41, 7360388)
Removed edge (52, 6458260)
Removed edge (107, 3480957)
Removed edge (80, 521549)
Removed edge (107, 3386953)
Removed edge (179, 5120874)
Removed edge (107, 6745796)
Removed edge (98, 1726332)
Removed edge (98, 6136599)
Removed edge (147, 7557543)
Removed edge (107, 3231512)
Removed edge (68, 2782213)
Removed edge (49, 4225276)
Removed edge (176, 5186399)
Removed edge (38, 5800932)
Removed edge (98, 5289434)
Removed edge (157, 2905158)
Removed edge (184, 1809066)
Removed edge (167, 3151832)
Removed edge (83, 2087876)
Removed edge (157, 4791510)
Removed edge (4, 653883)
Removed edge (101, 7430229)
Removed edge (189, 449500)
Removed edge (147, 2046001)
Removed edge (170, 6955829)
Removed edge (52, 4041311)
Removed edge (182, 4101539)
Removed edge (147, 2945617)
Removed edge (49, 4748829)
Removed edge (53, 3747008)
Removed edge (21, 6296733)
Removed edge (49, 3014513)
Removed edge (45, 2986493)
Removed edge (187, 5189767)
Removed edge (68, 3867014)
Removed edge (68, 5354077)
Removed edge (167, 4101258)
Removed edge (184, 2046181)
Removed edge (182, 3162897)
Removed edge (98, 6716713)
Removed edge (147, 4972937)
Removed edge (52, 2391578)
Removed edge (186, 6538891)
Removed edge (147, 593421)
Removed edge (21, 2458258)
Removed edge (83, 3385622)
Removed edge (57, 6118746)
Removed edge (49, 5970902)
Removed edge (184, 3604045)
Removed edge (157, 6719998)
Removed edge (167, 3801755)
Removed edge (176, 5468081)
Removed edge (107, 910662)
Removed edge (52, 2967391)
Removed edge (35, 4103691)
Removed edge (38, 1051436)
Removed edge (48, 4202885)
Removed edge (86, 3125589)
Removed edge (147, 189188)
Removed edge (86, 1150794)
Removed edge (48, 3964063)
Removed edge (184, 3226093)
Removed edge (147, 5107986)
Removed edge (68, 4839387)
Removed edge (147, 1038799)
Removed edge (68, 773037)
Removed edge (147, 2618492)
Removed edge (138, 3142485)
Removed edge (194, 6447319)
Removed edge (147, 891814)
Removed edge (53, 7254529)
Removed edge (53, 7515054)
Removed edge (96, 1031449)
Removed edge (24, 2460463)
Removed edge (67, 4346705)
Removed edge (21, 3281856)
Removed edge (98, 929808)
Removed edge (187, 3491434)
Removed edge (129, 5835227)
Removed edge (83, 2865808)
Removed edge (49, 3002840)
Removed edge (167, 6367271)
Removed edge (53, 875711)
Removed edge (184, 5380568)
Removed edge (21, 3674501)
Removed edge (70, 3260210)
Removed edge (157, 17401)
Removed edge (157, 4236012)
Removed edge (186, 1319140)
Removed edge (147, 1407447)
Removed edge (166, 7417387)
Removed edge (53, 5995926)
Removed edge (101, 250386)
Removed edge (53, 391256)
Removed edge (21, 2817471)
Removed edge (49, 5879813)
Removed edge (52, 3642955)
Removed edge (19, 2324573)
Removed edge (124, 1129867)
Removed edge (157, 4326511)
Removed edge (184, 4234856)
Removed edge (83, 3827371)
Removed edge (44, 7588966)
Removed edge (123, 5786502)
Removed edge (98, 5393169)
Removed edge (101, 6701077)
Removed edge (124, 4013819)
Removed edge (53, 2343341)
Removed edge (35, 3748552)
Removed edge (98, 1487120)
Removed edge (182, 1216299)
Removed edge (42, 1184273)
Removed edge (52, 1347953)
Removed edge (49, 3022710)
Removed edge (182, 5366202)
Removed edge (147, 389048)
Removed edge (147, 5739676)
Removed edge (194, 6841797)
Removed edge (166, 5080308)
Removed edge (185, 2130513)
Removed edge (41, 6193070)
Removed edge (32, 168611)
Removed edge (12, 6906116)
Removed edge (184, 2717448)
Removed edge (21, 2643020)
Removed edge (78, 3369556)
Removed edge (68, 2143106)
Removed edge (184, 296906)
Removed edge (98, 1732743)
Removed edge (117, 3090398)
Removed edge (24, 5661486)
Removed edge (53, 7070759)
Removed edge (189, 2995920)
Removed edge (184, 4241001)
Removed edge (184, 1265017)
Removed edge (187, 4647929)
Removed edge (21, 6907433)
Removed edge (191, 5429589)
Removed edge (73, 5075423)
Removed edge (21, 1353745)
Removed edge (124, 7213140)
Removed edge (35, 6081291)
Removed edge (176, 3252889)
Removed edge (139, 3358961)
Removed edge (19, 2856152)
Removed edge (184, 3031899)
Removed edge (167, 3290347)
Removed edge (98, 4490734)
Removed edge (48, 3965469)
Removed edge (147, 1206771)
Removed edge (83, 1391796)
Removed edge (124, 6498004)
Removed edge (184, 6024459)
Removed edge (70, 1285629)
Removed edge (182, 395691)
Removed edge (182, 6033176)
Removed edge (52, 326962)
Removed edge (182, 3221947)
Removed edge (170, 4956086)
Removed edge (184, 3067451)
Removed edge (187, 5841773)
Removed edge (92, 2117314)
Removed edge (101, 3461185)
Removed edge (126, 6033925)
Removed edge (184, 1295028)
Removed edge (96, 5344772)
Removed edge (35, 278952)
Removed edge (184, 2006294)
Removed edge (167, 3386248)
Removed edge (24, 7089933)
Removed edge (80, 2776915)
Removed edge (123, 1283781)
Removed edge (115, 5433324)
Removed edge (132, 5600192)
Removed edge (147, 6968635)
Removed edge (53, 2491367)
Removed edge (167, 3536650)
Removed edge (170, 7260665)
Removed edge (48, 2264117)
Removed edge (51, 7148738)
Removed edge (45, 2406474)
Removed edge (176, 3843358)
Removed edge (52, 3743229)
Removed edge (56, 4469146)
Removed edge (184, 3140641)
Removed edge (83, 103953)
Removed edge (98, 1910911)
Removed edge (147, 2791244)
Removed edge (49, 6796172)
Removed edge (176, 5151417)
Removed edge (98, 5712176)
Removed edge (167, 1332157)
Removed edge (98, 6423342)
Removed edge (49, 360111)
Removed edge (93, 1006818)
Removed edge (144, 6028093)
Removed edge (27, 4710712)
Removed edge (98, 2737735)
Removed edge (157, 6354149)
Removed edge (184, 1672572)
Removed edge (86, 3970809)
Removed edge (144, 7208701)
Removed edge (83, 3864753)
Removed edge (49, 3823313)
Removed edge (181, 6016124)
Removed edge (96, 4974914)
Removed edge (167, 3126911)
Removed edge (49, 468302)
Removed edge (48, 6952673)
Removed edge (184, 5938584)
Removed edge (53, 3125681)
Removed edge (176, 3463433)
Removed edge (184, 6597184)
Removed edge (170, 419382)
Removed edge (147, 1789128)
Removed edge (184, 486217)
Removed edge (53, 2404281)
Removed edge (184, 3423725)
Removed edge (138, 5771710)
Removed edge (142, 7407313)
Removed edge (21, 6455466)
Removed edge (49, 1402532)
Removed edge (170, 478281)
Removed edge (170, 1310912)
Removed edge (93, 5552382)
Removed edge (124, 6214997)
Removed edge (176, 462499)
Removed edge (184, 1565957)
Removed edge (138, 6912891)
Removed edge (104, 3648828)
Removed edge (167, 7317461)
Removed edge (49, 4880541)
Removed edge (56, 6611716)
Removed edge (161, 1794840)
Removed edge (53, 3985228)
Removed edge (157, 2817903)
Removed edge (126, 2118648)
Removed edge (49, 2474213)
Removed edge (167, 3509611)
Removed edge (157, 7583313)
Removed edge (101, 1362311)
Removed edge (68, 3007324)
Removed edge (186, 1942784)
Removed edge (32, 6181604)
Removed edge (49, 6866066)
Removed edge (83, 2608699)
Removed edge (21, 4133854)
Removed edge (48, 1639560)
Removed edge (15, 2607285)
Removed edge (53, 6818863)
Removed edge (53, 6753148)
Removed edge (98, 7292150)
Removed edge (41, 5770669)
Removed edge (24, 3022355)
Removed edge (53, 2963543)
Removed edge (135, 5492285)
Removed edge (49, 5756353)
Removed edge (167, 2091102)
Removed edge (53, 3801012)
Removed edge (170, 1862364)
Removed edge (21, 386973)
Removed edge (157, 5681904)
Removed edge (178, 2676907)
Removed edge (139, 529413)
Removed edge (86, 5908678)
Removed edge (164, 556311)
Removed edge (98, 2561013)
Removed edge (87, 1847566)
Removed edge (147, 1345205)
Removed edge (98, 520311)
Removed edge (1, 2871853)
Removed edge (53, 3598917)
Removed edge (98, 4004539)
Removed edge (126, 4277456)
Removed edge (53, 4900069)
Removed edge (48, 487235)
Removed edge (98, 3011546)
Removed edge (157, 4963306)
Removed edge (152, 3538296)
Removed edge (38, 3948490)
Removed edge (48, 3460419)
Removed edge (68, 6654202)
Removed edge (100, 2425079)
Removed edge (53, 1737602)
Removed edge (101, 998330)
Removed edge (157, 7407904)
Removed edge (182, 2542677)
Removed edge (35, 5168385)
Removed edge (68, 1576549)
Removed edge (68, 393549)
Removed edge (87, 4259831)
Removed edge (48, 3691635)
Removed edge (192, 6387776)
Removed edge (82, 5801850)
Removed edge (83, 7128793)
Removed edge (83, 6285106)
Removed edge (184, 4085893)
Removed edge (176, 195461)
Removed edge (176, 4066938)
Removed edge (184, 3782824)
Removed edge (82, 6792598)
Removed edge (48, 3670890)
Removed edge (37, 6119426)

In [9]:
def get_similarity_matrix(data='BF_DATA', similarity='Jaccard'):
    
    if similarity == 'Jaccard' and data=='BF_DATA':
        
        sim_mat = np.zeros((N_BRANDS,N_BRANDS))
        
        for brand_pair in combinations(range(N_BRANDS),2):
            
            b1 = BF_DATA.getrow(brand_pair[0])
            b2 = BF_DATA.getrow(brand_pair[1])
            
            common_connections = b1.multiply(b2).sum()
            total_connections = b1.sum() + b2.sum() - common_connections
            sim = common_connections/total_connections
            
            sim_mat[brand_pair[0] , brand_pair[1]] = sim_mat[brand_pair[1] , brand_pair[0]] = sim
            
    return sim_mat

In [10]:
SIM_MAT = get_similarity_matrix()

In [11]:
def predict_rank(scores, norm_factor, removed_brand):
    rank = 0
    for tup in sorted(scores.items(), key=lambda x: x[1]/norm_factor, reverse=True):
        rank += 1
        if tup[0] == removed_brand:
            return rank
    return None

In [12]:
def check_overlap(user_idx, brand_followed, brand_not_followed, brand_removed):
    if brand_removed == brand_not_followed:
        if SIM_MAT[brand_followed,brand_not_followed] == 0:
            print('[BR]No Overlap between',BRANDS_LIST[brand_followed], BRANDS_LIST[brand_not_followed])
            return True
        else:
            b1 = BF_DATA.getrow(brand_followed)
            b2 = BF_DATA.getrow(brand_not_followed)
            if b1.multiply(b2).sum() == 0:
                print('[AR]No Overlap between',BRANDS_LIST[brand_followed], BRANDS_LIST[brand_not_followed])
                return True
    return False

In [13]:
prediction_rank = dict()
for follower_idx in follower_ind:
    brands_followed = BF_DATA.getcol(follower_idx).nonzero()[0]
    brands_not_followed = np.delete(np.arange(N_BRANDS),brands_followed)
    scores = {}
    no_overlaps = {}
    for brand_not_followed in brands_not_followed:
        scores[brand_not_followed] = 0
        for brand_followed in brands_followed:
            scores[brand_not_followed] += SIM_MAT[brand_followed,brand_not_followed]
            #if check_overlap(follower_idx, brand_followed, brand_not_followed,brands_to_remove[follower_idx]):
                #print('Yikes!',brand_followed, brand_not_followed)
    prediction_rank[follower_idx] = predict_rank(scores, len(brands_followed), brands_to_remove[follower_idx])

In [14]:
list(prediction_rank.items())


Out[14]:
[(1167362, 10),
 (124932, 3),
 (5879813, 2),
 (6119426, 13),
 (499727, 1),
 (4096016, 1),
 (1353745, 7),
 (2797587, 3),
 (6701077, 4),
 (7505946, 1),
 (4435995, 5),
 (7198750, 1),
 (3256351, 1),
 (6367271, 17),
 (2046001, 2),
 (1347635, 10),
 (2643537, 8),
 (1295028, 1),
 (2879550, 15),
 (6387776, 3),
 (3338305, 1),
 (6064199, 2),
 (2406474, 17),
 (2916433, 1),
 (7213140, 4),
 (2531413, 1),
 (5151417, 2),
 (391256, 1),
 (2324573, 35),
 (3192926, 1),
 (929808, 1),
 (3183291, 2),
 (133229, 1),
 (6115442, 9),
 (7282805, 1),
 (520311, 1),
 (2619512, 2),
 (1910911, 1),
 (3840699, 55),
 (1724549, 2),
 (5189767, 9),
 (5458057, 2),
 (2932874, 29),
 (7286466, 5),
 (653454, 2),
 (2738323, 1),
 (2001945, 65),
 (4880541, 1),
 (1429189, 6),
 (5988512, 28),
 (3825826, 1),
 (768027, 2),
 (3782824, 1),
 (6455466, 12),
 (2676907, 2),
 (3051078, 1),
 (6381742, 1),
 (100527, 1),
 (3864753, 2),
 (2724019, 4),
 (3352756, 1),
 (2607285, 4),
 (5157047, 2),
 (4001976, 2),
 (6758589, 11),
 (1755327, 1),
 (1310912, 2),
 (3804875, 1),
 (6617284, 1),
 (5908678, 11),
 (3538296, 75),
 (3770578, 16),
 (6447319, 6),
 (5253345, 6),
 (207076, 1),
 (2474213, 1),
 (3401958, 1),
 (3823313, 1),
 (6203626, 1),
 (1579245, 6),
 (3358961, 96),
 (2425079, 55),
 (4831481, 6),
 (6654202, 1),
 (4225276, 1),
 (5552382, 30),
 (616705, 1),
 (4900069, 1),
 (6906116, 4),
 (3463433, 1),
 (1847566, 5),
 (1487120, 1),
 (2867474, 2),
 (3385622, 2),
 (7128793, 2),
 (6818863, 1),
 (7438623, 10),
 (7407904, 1),
 (1554729, 1),
 (5296430, 13),
 (5712176, 1),
 (4778289, 1),
 (1552693, 2),
 (4350345, 2),
 (4710712, 40),
 (6940985, 12),
 (529413, 71),
 (5775678, 1),
 (2264117, 4),
 (4974914, 1),
 (389445, 10),
 (4761158, 2),
 (393549, 1),
 (6783312, 1),
 (6521169, 10),
 (2599251, 1),
 (2238804, 1),
 (3125589, 20),
 (5183834, 43),
 (7231844, 5),
 (369004, 2),
 (1347953, 1),
 (4155762, 1),
 (1870199, 1),
 (3674501, 3),
 (3867014, 1),
 (1362311, 15),
 (4972937, 1),
 (5429648, 1),
 (1739155, 52),
 (7051668, 1),
 (4469146, 2),
 (1716636, 5),
 (942493, 2),
 (35234, 1),
 (6885797, 17),
 (7557543, 1),
 (278952, 4),
 (395691, 11),
 (2343341, 1),
 (2038173, 24),
 (3125681, 7),
 (5831093, 6),
 (5366202, 120),
 (3221947, 14),
 (6668732, 1),
 (5771710, 16),
 (2705860, 11),
 (4041160, 5),
 (2643020, 29),
 (3187150, 1),
 (1038799, 1),
 (3404243, 5),
 (1407447, 2),
 (5380568, 1),
 (3363289, 19),
 (5691867, 19),
 (5835229, 5),
 (5075423, 17),
 (301536, 5),
 (7422433, 1),
 (4013819, 2),
 (2331113, 5),
 (5800427, 1),
 (3226093, 1),
 (3203566, 4),
 (7514609, 1),
 (1206771, 1),
 (6423030, 9),
 (7025143, 3),
 (7133688, 19),
 (7260665, 4),
 (2986493, 10),
 (6719998, 2),
 (7430229, 6),
 (7254529, 1),
 (6033925, 36),
 (3981830, 1),
 (5005835, 26),
 (4043277, 2),
 (7391759, 32),
 (1935888, 2),
 (1184273, 17),
 (250386, 1),
 (5018133, 12),
 (6611716, 4),
 (3965469, 2),
 (3510814, 1),
 (354856, 2),
 (467499, 2),
 (7062060, 1),
 (7494189, 1),
 (2767408, 2),
 (6255153, 39),
 (2091102, 8),
 (7375625, 1),
 (653883, 34),
 (6718015, 4),
 (6597184, 1),
 (2058819, 4),
 (3598917, 1),
 (924231, 6),
 (6541068, 1),
 (2130513, 26),
 (6836818, 2),
 (3369556, 22),
 (6038286, 2),
 (5354077, 1),
 (4041311, 2),
 (4085893, 1),
 (5104059, 1),
 (7588966, 4),
 (3162897, 5),
 (3357288, 2),
 (5904057, 2),
 (2773610, 1),
 (2456173, 1),
 (7299697, 1),
 (7179027, 4),
 (3869300, 1),
 (1741430, 1),
 (1124983, 1),
 (2817471, 1),
 (6949500, 2),
 (6354149, 2),
 (1774213, 4),
 (2865808, 3),
 (3231512, 1),
 (2458258, 4),
 (197272, 1),
 (3252889, 5),
 (3801755, 1),
 (1733278, 1),
 (7134990, 4),
 (168611, 4),
 (3562149, 56),
 (1682087, 1),
 (1299422, 19),
 (1809066, 5),
 (6795949, 2),
 (4442803, 1),
 (3895988, 1),
 (3564216, 1),
 (427124, 4),
 (6918843, 2),
 (5628607, 1),
 (4004539, 1),
 (3748552, 1),
 (6005449, 33),
 (3855050, 2),
 (1862364, 3),
 (7366135, 3),
 (7217886, 5),
 (6181604, 1),
 (6906597, 7),
 (6016124, 49),
 (4236012, 2),
 (5681904, 2),
 (2898680, 7),
 (2462460, 1),
 (3734270, 1),
 (189188, 1),
 (2724613, 8),
 (6161160, 1),
 (3035948, 2),
 (6081291, 1),
 (5600014, 1),
 (5393169, 6),
 (2492112, 1),
 (5661486, 17),
 (2140950, 3),
 (6136599, 1),
 (1794840, 61),
 (3785500, 1),
 (5289434, 2),
 (864389, 4),
 (289570, 1),
 (5835227, 8),
 (1732743, 2),
 (1051436, 12),
 (1563437, 9),
 (6423342, 6),
 (2460463, 13),
 (2810674, 1),
 (6955829, 3),
 (1870646, 2),
 (326457, 1),
 (4870972, 13),
 (6028093, 56),
 (3773237, 3),
 (4612929, 1),
 (834372, 1),
 (2444105, 8),
 (7291724, 2),
 (4346705, 36),
 (3142485, 3),
 (3031899, 1),
 (3007324, 2),
 (5186399, 7),
 (4672356, 8),
 (3963708, 1),
 (5120874, 36),
 (6866066, 1),
 (6912891, 1),
 (6753148, 2),
 (6730623, 2),
 (6285106, 5),
 (2143106, 1),
 (195461, 1),
 (5786502, 1),
 (3386248, 7),
 (2143113, 76),
 (2542677, 4),
 (4561803, 1),
 (6796172, 1),
 (234383, 12),
 (5145490, 6),
 (6458260, 41),
 (4055957, 1),
 (830359, 1),
 (3386953, 2),
 (3352389, 3),
 (5770669, 41),
 (891814, 1),
 (1000359, 2),
 (1043369, 2),
 (773037, 3),
 (7515054, 1),
 (7467953, 1),
 (2761654, 104),
 (486217, 1),
 (998330, 9),
 (1332157, 17),
 (6224831, 31),
 (5600192, 90),
 (2087876, 3),
 (3785671, 1),
 (5348296, 1),
 (2791244, 44),
 (3595210, 1),
 (1539575, 5),
 (91087, 5),
 (1551312, 1),
 (1518546, 18),
 (6653880, 8),
 (5970902, 1),
 (5851641, 3),
 (3011546, 26),
 (449500, 27),
 (4133854, 3),
 (6900704, 1),
 (5800932, 8),
 (2491367, 3),
 (72682, 18),
 (4150251, 21),
 (1285629, 1),
 (2561013, 1),
 (2118648, 70),
 (4647929, 18),
 (2963543, 1),
 (6591483, 4),
 (3013628, 2),
 (2509823, 3),
 (7531522, 19),
 (2782213, 4),
 (2448399, 10),
 (6818833, 1),
 (6482962, 13),
 (163331, 9),
 (6079510, 4),
 (4994073, 3),
 (1158170, 1),
 (209951, 4),
 (3140641, 1),
 (4678491, 1),
 (2972708, 2),
 (4709414, 2),
 (7070759, 1),
 (1393705, 1),
 (2522159, 2),
 (2540593, 85),
 (6790197, 1),
 (1455167, 5),
 (672834, 2),
 (2905158, 12),
 (478281, 2),
 (1078348, 1),
 (608781, 1),
 (3370069, 3),
 (3677270, 1),
 (6360162, 7),
 (5620838, 4),
 (1678439, 7),
 (4863084, 11),
 (6599186, 1),
 (4326511, 2),
 (6167664, 1),
 (607345, 1),
 (3691635, 2),
 (3249268, 1),
 (2618492, 2),
 (5575530, 44),
 (1639560, 2),
 (595081, 1),
 (4101258, 2),
 (2327691, 50),
 (5841773, 12),
 (2759826, 1),
 (2487443, 1),
 (6859925, 1),
 (1394201, 84),
 (6122649, 2),
 (3697819, 10),
 (5739676, 7),
 (6296733, 4),
 (3964063, 2),
 (556311, 22),
 (1899037, 1),
 (3577009, 1),
 (3683507, 1),
 (1391796, 18),
 (7525918, 6),
 (5420219, 5),
 (875711, 1),
 (3747008, 2),
 (7148738, 9),
 (4209860, 10),
 (6792598, 39),
 (957639, 130),
 (1789128, 1),
 (4277456, 34),
 (505041, 1),
 (3038420, 1),
 (4791510, 1),
 (625880, 3),
 (1537247, 1),
 (1006818, 40),
 (326884, 2),
 (4572389, 9),
 (7087335, 3),
 (64745, 1),
 (3290347, 1),
 (7441648, 1),
 (6632689, 1),
 (64754, 1),
 (5080308, 11),
 (7292150, 9),
 (6907433, 3),
 (1942784, 13),
 (5168385, 42),
 (1565957, 2),
 (6024459, 1),
 (4037900, 83),
 (1737602, 2),
 (1756431, 15),
 (2856152, 55),
 (3779858, 12),
 (2006294, 3),
 (122135, 2),
 (1031449, 11),
 (4148508, 1),
 (3843358, 2),
 (5527429, 1),
 (1508641, 1),
 (1606946, 2),
 (4447465, 1),
 (7541638, 2),
 (6716713, 1),
 (2395435, 2),
 (4455724, 5),
 (326962, 1),
 (2192694, 2),
 (1285433, 2),
 (6968635, 1),
 (3648828, 51),
 (1637695, 2),
 (853313, 1),
 (3460419, 15),
 (3091782, 23),
 (3785516, 1),
 (3765815, 2),
 (417101, 2),
 (468302, 1),
 (1946959, 4),
 (6214997, 11),
 (1319140, 17),
 (6118746, 8),
 (2577295, 1),
 (2379101, 3),
 (2046181, 3),
 (945506, 4),
 (3067451, 1),
 (1697681, 1),
 (3509611, 1),
 (4220269, 1),
 (6374769, 42),
 (4521330, 2),
 (6989171, 145),
 (3456610, 1),
 (3026294, 10),
 (4914551, 1),
 (2035064, 1),
 (1265017, 1),
 (3968378, 1),
 (1672572, 7),
 (3480957, 3),
 (518470, 3),
 (4955528, 1),
 (1129867, 8),
 (4304271, 1),
 (6002068, 1),
 (5995926, 4),
 (5938584, 1),
 (4162971, 1),
 (4101539, 12),
 (1525157, 1),
 (5069041, 1),
 (2737735, 1),
 (114093, 1),
 (4930991, 1),
 (183730, 1),
 (1672627, 4),
 (2534836, 2),
 (2287031, 1),
 (1119678, 24),
 (2158015, 10),
 (5756353, 2),
 (6841797, 6),
 (3801864, 2),
 (1199563, 5),
 (6438351, 2),
 (951760, 1),
 (990682, 4),
 (2688481, 1),
 (3002840, 1),
 (2945617, 1),
 (3237353, 7),
 (4351467, 6),
 (2697733, 3),
 (3423725, 1),
 (4490734, 34),
 (388597, 1),
 (6450679, 11),
 (4345336, 24),
 (2700795, 21),
 (3743229, 1),
 (4369919, 1),
 (65027, 6),
 (5344772, 1),
 (2491912, 1),
 (2754057, 1),
 (1936906, 6),
 (4103691, 7),
 (593421, 1),
 (103953, 2),
 (4414994, 2),
 (3022355, 13),
 (157204, 64),
 (142873, 4),
 (2391578, 1),
 (4748829, 1),
 (1644062, 1),
 (2829855, 4),
 (1396257, 23),
 (3461185, 1),
 (6790695, 19),
 (4154920, 28),
 (917033, 1),
 (7417387, 4),
 (1025580, 3),
 (3750493, 1),
 (419382, 3),
 (4238903, 2),
 (2608699, 26),
 (6485564, 1),
 (5492285, 1),
 (4574788, 1),
 (945733, 5),
 (1521222, 27),
 (3698247, 9),
 (7605833, 4),
 (779850, 4),
 (3642955, 1),
 (3604045, 1),
 (7583313, 4),
 (7148131, 1),
 (5068376, 1),
 (3622491, 9),
 (5140066, 1),
 (5119588, 6),
 (1576549, 1),
 (4234856, 1),
 (4241001, 1),
 (3491434, 15),
 (5107986, 1),
 (3171952, 14),
 (3884660, 1),
 (2815593, 1),
 (2506361, 1),
 (4066938, 2),
 (3924562, 3),
 (4224639, 4),
 (3281856, 2),
 (2758277, 6),
 (6538891, 23),
 (5848717, 40),
 (3780238, 9),
 (1693333, 1),
 (268229, 1),
 (462499, 1),
 (1402532, 1),
 (7576177, 2),
 (4202885, 14),
 (3827371, 1),
 (2705068, 1),
 (3447414, 4),
 (2717448, 1),
 (360111, 2),
 (7382704, 17),
 (4773553, 3),
 (5355186, 2),
 (1345205, 1),
 (1551303, 1),
 (2339785, 1),
 (582260, 1),
 (3948490, 7),
 (1959616, 1),
 (2117314, 11),
 (6745796, 1),
 (1283781, 8),
 (4687563, 1),
 (2995920, 2),
 (7407313, 30),
 (3339257, 2),
 (6498004, 5),
 (4589273, 1),
 (1070810, 2),
 (1591003, 2),
 (5246686, 2),
 (6952673, 3),
 (5633767, 3),
 (7010027, 2),
 (4378349, 109),
 (2939859, 2),
 (3970809, 17),
 (3126911, 3),
 (7208701, 11),
 (6018824, 1),
 (3536650, 3),
 (5325252, 2),
 (7089933, 24),
 (7644942, 1),
 (968464, 5),
 (1413080, 3),
 (6079155, 1),
 (6033176, 12),
 (2643739, 3),
 (5566597, 80),
 (1623842, 10),
 (4402982, 1),
 (1216299, 7),
 (2721585, 75),
 (3260210, 1),
 (4091701, 12),
 (3260214, 6),
 (7114551, 1),
 (1789752, 26),
 (3670890, 1),
 (2412347, 1),
 (7192381, 3),
 (2506561, 2),
 (7227202, 1),
 (487235, 2),
 (2729270, 3),
 (5955177, 5),
 (1150794, 10),
 (3985228, 1),
 (2621265, 54),
 (3456850, 4),
 (2776915, 6),
 (1535829, 15),
 (2967391, 1),
 (870240, 1),
 (4200290, 12),
 (1365865, 1),
 (2991079, 1),
 (2817903, 3),
 (3014513, 2),
 (2912114, 1),
 (3022710, 1),
 (5801850, 29),
 (1726332, 1),
 (4963306, 3),
 (6823806, 1),
 (2094975, 5),
 (4697985, 3),
 (7360388, 46),
 (2897797, 1),
 (5067416, 4),
 (4302738, 22),
 (137112, 1),
 (3823513, 1),
 (3182492, 8),
 (386973, 7),
 (3243936, 2),
 (910662, 4),
 (1042348, 6),
 (3633069, 2),
 (6193070, 75),
 (5468081, 9),
 (3801012, 1),
 (4956086, 3),
 (389048, 1),
 (2404281, 2),
 (4890613, 26),
 (6096837, 7),
 (296906, 18),
 (4259831, 4),
 (521549, 4),
 (4278225, 1),
 (5984212, 61),
 (7317461, 2),
 (17401, 4),
 (3151832, 3),
 (2824153, 1),
 (4839387, 2),
 (2871853, 86),
 (3090398, 12),
 (1157797, 1),
 (6162087, 2),
 (5433324, 9),
 (1617904, 2),
 (4054696, 7),
 (7397365, 1),
 (2123767, 31),
 (5429589, 23)]

In [15]:
from functools import reduce
rank_sum = reduce(lambda x,y: x+y, prediction_rank.values())
print('Mean Rank', rank_sum/len(prediction_rank))


Mean Rank 8.891304347826088

In [16]:
hist, bins = np.histogram(list(prediction_rank.values()),bins=50)    
width = 0.9 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
#plt.yscale("log")
plt.bar(center, hist, align='center', width=width)
plt.title('Rank Distribution')


Out[16]:
<matplotlib.text.Text at 0x2e86f134a8>

In [17]:
mean_rank = {}
brands_followed_count = {}
for k in prediction_rank:
    count = len(BF_DATA.getcol(k).nonzero()[0])
    if count in mean_rank.keys():
        mean_rank[count] += prediction_rank[k]
        brands_followed_count[count] += 1
    else:
        mean_rank[count] = prediction_rank[k]
        brands_followed_count[count] = 1

In [18]:
brands_followed_count


Out[18]:
{4: 319,
 5: 167,
 6: 89,
 7: 55,
 8: 37,
 9: 11,
 10: 16,
 11: 12,
 12: 4,
 13: 3,
 14: 4,
 15: 2,
 16: 3,
 17: 2,
 18: 1,
 19: 1,
 20: 1,
 21: 2,
 22: 1,
 23: 1,
 26: 1,
 27: 1,
 29: 1,
 30: 1,
 44: 1}

In [19]:
for k in mean_rank:
    mean_rank[k] //= brands_followed_count[k]
mean_rank


Out[19]:
{4: 8,
 5: 6,
 6: 8,
 7: 12,
 8: 6,
 9: 11,
 10: 4,
 11: 19,
 12: 3,
 13: 2,
 14: 12,
 15: 8,
 16: 27,
 17: 21,
 18: 5,
 19: 22,
 20: 54,
 21: 19,
 22: 30,
 23: 23,
 26: 9,
 27: 12,
 29: 34,
 30: 8,
 44: 26}

In [20]:
data_points = mean_rank.items()
x = [p[0] for p in data_points]
y = [p[1] for p in data_points]
plt.bar(x,y)
plt.title('Average Rank by number of brands followed')


Out[20]:
<matplotlib.text.Text at 0x2e8af762e8>

In [21]:
good_predictions = []
bad_predictions = []
max_rank = max([p[1] for p in data_points])
for k in prediction_rank:
    if prediction_rank[k] >= max_rank*0.5:
        bad_predictions.append(brands_to_remove[k])
    else:
        good_predictions.append(brands_to_remove[k])
final_list = list(set(bad_predictions) - set(good_predictions))
final_list.sort()
for b in final_list:
    print(BRANDS_LIST[b])


3mnews
acneorg
aquafina
babobotanicals
belkin
bissellclean
bwaynails
dameelizabeth
fishernutsbrand
govoskos
huggies
jewelosco
kelloggcompany
lashem
mineralfusion
montagnejeuness
nakedpizza
onecoconut
ouidad
preschoice
proactiv
realcapncrunch
reallunchables
samsung
tedgibson
thermador

In [22]:
stop = datetime.now()
print('Time taken',stop-start)


Time taken 0:05:01.799437

Time taken 0:02:27.579176


In [ ]: